from IPython import get_ipython
ipython = get_ipython()
# Code to automatically update the HookedTransformer code as its edited without restarting the kernel
ipython.magic("load_ext autoreload")
ipython.magic("autoreload 2")
import os
import torch
import pandas as pd
import plotly.express as px
import transformer_lens.utils as utils
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
# =============================================================================
pio.renderers.default = 'png' # USE IF MAKING GRAPHS FOR NOTEBOOK EXPORT
# =============================================================================
from neel_plotly import line, imshow, scatter
def l_imshow(tensor, renderer=None, **kwargs):
px.imshow(utils.to_numpy(tensor), color_continuous_midpoint=0.0, color_continuous_scale="RdBu", **kwargs).show(renderer)
def l_line(tensor, renderer=None, width=1200, height=500, **kwargs):
fig = px.line(y=utils.to_numpy(tensor), **kwargs)
fig.update_layout(
autosize=False,
width=width,
height=height
)
fig.show(renderer=renderer)
def l_scatter(x, y, xaxis="", yaxis="", caxis="", renderer=None, **kwargs):
x = utils.to_numpy(x)
y = utils.to_numpy(y)
px.scatter(y=y, x=x, labels={"x":xaxis, "y":yaxis, "color":caxis}, **kwargs).show(renderer)
def two_lines(tensor1, tensor2, renderer=None, **kwargs):
px.line(y=[utils.to_numpy(tensor1), utils.to_numpy(tensor2)], **kwargs).show(renderer)
def get_checkpoint_names(mode: str = "linear"):
if mode == "linear":
ckpts = [i * 1000 for i in range(1, 144)]
elif mode == "exponential":
ckpts = [
round((2**i) / 1000) * 1000 if 2**i > 1000 else 2**i
for i in range(18)
]
elif mode == "exp_plus_detail":
ckpts = (
[2**i for i in range(10)]
+ [i * 1000 for i in range(1, 16)]
+ [i * 5000 for i in range(3, 14)]
+ [i * 10000 for i in range(7, 15)]
)
else:
ckpts = [1, 2]
return ckpts
def plot_lines(data_series, labels, colors, x_vals, x_title='X', y_title='Y', title='Line plot'):
fig = go.Figure()
for series, label, color in zip(data_series, labels, colors):
fig.add_trace(go.Scatter(x=x_vals, y=series, mode='lines', name=label, line=dict(color=color)))
fig.update_xaxes(type='log', title=x_title)
fig.update_yaxes(title=y_title)
fig.update_layout(title=title, width=1200, height=500)
fig.show()
def load_metrics(directory):
nested_dict = {}
# Iterate through each model subfolder, excluding '.ipynb_checkpoints'
for model_folder in os.listdir(directory):
model_path = os.path.join(directory, model_folder)
if os.path.isdir(model_path) and model_folder != '.ipynb_checkpoints':
# Create root-level key by dropping the last two words ("no-dropout")
model_key = '-'.join(model_folder.split('-')[:-2])
nested_dict[model_key] = {}
# Iterate through each task subfolder
for task_folder in os.listdir(model_path):
task_path = os.path.join(model_path, task_folder)
if os.path.isdir(task_path) and task_folder != '.ipynb_checkpoints':
nested_dict[model_key][task_folder] = {}
# Load .pt files and add to the dictionary
for file in os.listdir(task_path):
if file.endswith('.pt'):
file_path = os.path.join(task_path, file)
tensor = torch.Tensor(torch.load(file_path))
nested_dict[model_key][task_folder][file[:-3]] = tensor
return nested_dict
# Usage example
directory_path = 'results' # Replace with your directory path
metrics = load_metrics(directory_path)
metrics.keys()
dict_keys(['pythia-160m-data-seed2', 'pythia-160m-data-seed3', 'pythia-160m-weight-seed3', 'pythia-160m-weight-seed1', 'pythia-70m', 'pythia-160m-seed3', 'pythia-2.8b', '', 'pythia-160m-data-seed1', 'pythia-410m', 'pythia-160m-seed1', 'pythia-160m-weight-seed2', 'pythia-1.4b', 'pythia-160m', 'pythia-160m-seed2'])
metrics['pythia-160m']['ioi'].keys()
dict_keys(['rank_0', 'accuracy', 'probability_diff', 'logit_diff', 'probability_mass'])
metrics['pythia-70m']['ioi']['normed_logit_diff'] = metrics['pythia-70m']['ioi']['logit_diff'] / metrics['pythia-70m']['ioi']['logit_diff'].max()
metrics['pythia-70m']['ioi']['normed_accuracy'] = (metrics['pythia-70m']['ioi']['accuracy'] - 0.5) / (metrics['pythia-70m']['ioi']['accuracy'].max() - metrics['pythia-70m']['ioi']['accuracy'].min())
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-70m']['ioi']['normed_logit_diff'],
metrics['pythia-70m']['ioi']['normed_accuracy'],
metrics['pythia-70m']['ioi']['rank_0'],
metrics['pythia-70m']['ioi']['probability_mass'],
metrics['pythia-70m']['ioi']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='IOI Metrics in 70M Model Over Training Time (Log Scale)')
l_line(metrics['pythia-70m']['ioi']['logit_diff'], title="Pythia-70M IOI Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-70m']['ioi']['accuracy'], title="Pythia-70M Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-70m']['ioi']['rank_0'], title="Pythia-70M Correct Rank 0 Rate Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-70m']['ioi']['probability_mass'], title="Pythia-70M Probability Mass Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-160m']['ioi']['normed_logit_diff'] = metrics['pythia-160m']['ioi']['logit_diff'] / metrics['pythia-160m']['ioi']['logit_diff'].max()
metrics['pythia-160m']['ioi']['normed_accuracy'] = (metrics['pythia-160m']['ioi']['accuracy'] - 0.5) / (metrics['pythia-160m']['ioi']['accuracy'].max() - metrics['pythia-160m']['ioi']['accuracy'].min())
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-160m']['ioi']['normed_logit_diff'],
metrics['pythia-160m']['ioi']['normed_accuracy'],
metrics['pythia-160m']['ioi']['rank_0'],
metrics['pythia-160m']['ioi']['probability_mass'],
metrics['pythia-160m']['ioi']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='IOI Metrics in 160M Model Over Training Time (Log Scale)')
l_line(metrics['pythia-160m']['ioi']['logit_diff'], title="Pythia-160M IOI Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-160m']['ioi']['accuracy'], title="Pythia-160M Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-160m']['ioi']['rank_0'], title="Pythia-160M Correct Rank 0 Rate Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-160m']['ioi']['probability_mass'], title="Pythia-160M Probability Mass Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-160m']['ioi']['probability_diff'], title="Pythia-160M Probability Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-160m']['greater_than']['normed_logit_diff'] = metrics['pythia-160m']['greater_than']['logit_diff'] / metrics['pythia-160m']['greater_than']['logit_diff'].max()
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-160m']['greater_than']['normed_logit_diff'],
metrics['pythia-160m']['greater_than']['prob_diff'],
]
labels = ['Normed Logit Diff', 'Probability Diff']#, 'Probability Mass', ]
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Greater-Than Metrics in 160M Model Over Training Time (Log Scale)')
l_line(metrics['pythia-160m']['greater_than']['logit_diff'], title="Pythia-160M Greater-Than Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-160m']['sentiment_cont']['normed_logit_diff'] = metrics['pythia-160m']['sentiment_cont']['logit_diff'] / metrics['pythia-160m']['sentiment_cont']['logit_diff'].max()
metrics['pythia-160m']['sentiment_cont']['normed_accuracy'] = (metrics['pythia-160m']['sentiment_cont']['accuracy'] - 0.5) / (metrics['pythia-160m']['sentiment_cont']['accuracy'].max() - metrics['pythia-160m']['sentiment_cont']['accuracy'].min())
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-160m']['sentiment_cont']['normed_logit_diff'],
metrics['pythia-160m']['sentiment_cont']['normed_accuracy'],
metrics['pythia-160m']['sentiment_cont']['rank_0'],
metrics['pythia-160m']['sentiment_cont']['probability_mass'],
metrics['pythia-160m']['sentiment_cont']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Sentiment Cont Metrics in 160M Model Over Training Time (Log Scale)')
l_line(metrics['pythia-160m']['sentiment_cont']['logit_diff'], title="Pythia-160M Simple Sentiment Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-160m']['sentiment_cont']['accuracy'], title="Pythia-160M Simple Sentiment Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-160m']['sentiment_class']['normed_logit_diff'] = metrics['pythia-160m']['sentiment_class']['logit_diff'] / metrics['pythia-160m']['sentiment_class']['logit_diff'].max()
metrics['pythia-160m']['sentiment_class']['normed_accuracy'] = (metrics['pythia-160m']['sentiment_class']['accuracy'] - 0.5) / (metrics['pythia-160m']['sentiment_class']['accuracy'].max() - metrics['pythia-160m']['sentiment_class']['accuracy'].min())
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-160m']['sentiment_class']['normed_logit_diff'],
metrics['pythia-160m']['sentiment_class']['normed_accuracy'],
metrics['pythia-160m']['sentiment_class']['rank_0'],
metrics['pythia-160m']['sentiment_class']['probability_mass'],
metrics['pythia-160m']['sentiment_class']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Sentiment Classification Metrics in 160M Model Over Training Time (Log Scale)')
l_line(metrics['pythia-160m']['sentiment_class']['logit_diff'], title="Pythia-160M Sentiment Classification Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-160m']['sentiment_class']['accuracy'], title="Pythia-160M Sentiment Classification Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
data_series = [
metrics['pythia-160m']['ioi']['normed_logit_diff'],
metrics['pythia-160m']['greater_than']['normed_logit_diff'],
metrics['pythia-160m']['sentiment_cont']['normed_logit_diff'],
metrics['pythia-160m']['sentiment_class']['normed_logit_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Normalized Task Logit Diff in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['logit_diff'],
metrics['pythia-160m']['greater_than']['logit_diff'],
metrics['pythia-160m']['sentiment_cont']['logit_diff'],
metrics['pythia-160m']['sentiment_class']['logit_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Raw Task Logit Diff in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['probability_diff'],
metrics['pythia-160m']['greater_than']['prob_diff'],
metrics['pythia-160m']['sentiment_cont']['probability_diff'],
metrics['pythia-160m']['sentiment_class']['probability_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Task Probability Diff in 160M Model Over Training Time (Log Scale)')
metrics['pythia-410m']['ioi']['normed_logit_diff'] = metrics['pythia-410m']['ioi']['logit_diff'] / metrics['pythia-410m']['ioi']['logit_diff'].max()
metrics['pythia-410m']['ioi']['normed_accuracy'] = (metrics['pythia-410m']['ioi']['accuracy'] - 0.5) / (metrics['pythia-410m']['ioi']['accuracy'].max() - metrics['pythia-410m']['ioi']['accuracy'].min())
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-410m']['ioi']['normed_logit_diff'],
metrics['pythia-410m']['ioi']['normed_accuracy'],
metrics['pythia-410m']['ioi']['rank_0'],
metrics['pythia-410m']['ioi']['probability_mass'],
metrics['pythia-410m']['ioi']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='IOI Metrics in 410M Model Over Training Time (Log Scale)')
l_line(metrics['pythia-410m']['ioi']['logit_diff'], title="Pythia-410M IOI Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-410m']['ioi']['accuracy'], title="Pythia-410M Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-410m']['greater_than']['normed_logit_diff'] = metrics['pythia-410m']['greater_than']['logit_diff'] / metrics['pythia-410m']['greater_than']['logit_diff'].max()
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-410m']['greater_than']['normed_logit_diff'],
metrics['pythia-410m']['greater_than']['prob_diff'],
]
labels = ['Normed Logit Diff', 'Probability Diff']#, 'Probability Mass', ]
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Greater-Than Metrics in 410M Model Over Training Time (Log Scale)')
l_line(metrics['pythia-410m']['greater_than']['logit_diff'], title="Pythia-410M Greater-Than Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-410m']['sentiment_cont']['normed_logit_diff'] = metrics['pythia-410m']['sentiment_cont']['logit_diff'] / metrics['pythia-410m']['sentiment_cont']['logit_diff'].max()
metrics['pythia-410m']['sentiment_cont']['normed_accuracy'] = (metrics['pythia-410m']['sentiment_cont']['accuracy'] - 0.5) / (metrics['pythia-410m']['sentiment_cont']['accuracy'].max() - metrics['pythia-410m']['sentiment_cont']['accuracy'].min())
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-410m']['sentiment_cont']['normed_logit_diff'],
metrics['pythia-410m']['sentiment_cont']['normed_accuracy'],
metrics['pythia-410m']['sentiment_cont']['rank_0'],
metrics['pythia-410m']['sentiment_cont']['probability_mass'],
metrics['pythia-410m']['sentiment_cont']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Sentiment Cont Metrics in 410M Model Over Training Time (Log Scale)')
l_line(metrics['pythia-410m']['sentiment_cont']['logit_diff'], title="Pythia-410M Simple Sentiment Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-410m']['sentiment_cont']['accuracy'], title="Pythia-410M Simple Sentiment Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-410m']['sentiment_class']['normed_logit_diff'] = metrics['pythia-410m']['sentiment_class']['logit_diff'] / metrics['pythia-410m']['sentiment_class']['logit_diff'].max()
metrics['pythia-410m']['sentiment_class']['normed_accuracy'] = (metrics['pythia-410m']['sentiment_class']['accuracy'] - 0.5) / (metrics['pythia-410m']['sentiment_class']['accuracy'].max() - metrics['pythia-410m']['sentiment_class']['accuracy'].min())
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-410m']['sentiment_class']['normed_logit_diff'],
metrics['pythia-410m']['sentiment_class']['normed_accuracy'],
metrics['pythia-410m']['sentiment_class']['rank_0'],
metrics['pythia-410m']['sentiment_class']['probability_mass'],
metrics['pythia-410m']['sentiment_class']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Sentiment Classification Metrics in 410M Model Over Training Time (Log Scale)')
l_line(metrics['pythia-410m']['sentiment_class']['logit_diff'], title="Pythia-410M Sentiment Classification Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-410m']['sentiment_class']['accuracy'], title="Pythia-410M Sentiment Classification Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
data_series = [
metrics['pythia-410m']['ioi']['normed_logit_diff'],
metrics['pythia-410m']['greater_than']['normed_logit_diff'],
metrics['pythia-410m']['sentiment_cont']['normed_logit_diff'],
metrics['pythia-410m']['sentiment_class']['normed_logit_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Normalized Task Logit Diff in 410M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-410m']['ioi']['logit_diff'],
metrics['pythia-410m']['greater_than']['logit_diff'],
metrics['pythia-410m']['sentiment_cont']['logit_diff'],
metrics['pythia-410m']['sentiment_class']['logit_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Raw Task Logit Diff in 410M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-410m']['ioi']['probability_diff'],
metrics['pythia-410m']['greater_than']['prob_diff'],
metrics['pythia-410m']['sentiment_cont']['probability_diff'],
metrics['pythia-410m']['sentiment_class']['probability_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Task Probability Diff in 410M Model Over Training Time (Log Scale)')
def rescale_tensor(tensor):
"""
Rescales a tensor with values usually between 0.5 and 1 to be between 0 and 1.
Args:
tensor (torch.Tensor): The tensor to be rescaled.
Returns:
torch.Tensor: The rescaled tensor.
"""
# Find the minimum and maximum values in the tensor
min_val = tensor.min()
max_val = tensor.max()
# Normalize the tensor to a range of 0 to 1
if max_val - min_val > 0:
rescaled_tensor = (tensor - min_val) / (max_val - min_val)
else:
# Handle the case where all values in the tensor are the same
rescaled_tensor = torch.zeros_like(tensor)
return rescaled_tensor
metrics['pythia-1.4b']['ioi']['normed_logit_diff'] = metrics['pythia-1.4b']['ioi']['logit_diff'] / metrics['pythia-1.4b']['ioi']['logit_diff'].max()
metrics['pythia-1.4b']['ioi']['normed_accuracy'] = rescale_tensor(metrics['pythia-1.4b']['ioi']['accuracy'])
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-1.4b']['ioi']['normed_logit_diff'],
metrics['pythia-1.4b']['ioi']['normed_accuracy'],
metrics['pythia-1.4b']['ioi']['rank_0'],
metrics['pythia-1.4b']['ioi']['probability_mass'],
metrics['pythia-1.4b']['ioi']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='IOI Metrics in 1.4B Model Over Training Time (Log Scale)')
l_line(metrics['pythia-1.4b']['ioi']['logit_diff'], title="Pythia-1.4B IOI Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-1.4b']['ioi']['accuracy'], title="Pythia-1.4B Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-1.4b']['greater_than']['normed_logit_diff'] = metrics['pythia-1.4b']['greater_than']['logit_diff'] / metrics['pythia-1.4b']['greater_than']['logit_diff'].max()
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-1.4b']['greater_than']['normed_logit_diff'],
metrics['pythia-1.4b']['greater_than']['prob_diff'],
]
labels = ['Normed Logit Diff', 'Probability Diff']#, 'Probability Mass', ]
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Greater-Than Metrics in 1.4B Model Over Training Time (Log Scale)')
l_line(metrics['pythia-1.4b']['greater_than']['logit_diff'], title="Pythia-1.4B Greater-Than Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-1.4b']['sentiment_cont']['normed_logit_diff'] = metrics['pythia-1.4b']['sentiment_cont']['logit_diff'] / metrics['pythia-1.4b']['sentiment_cont']['logit_diff'].max()
metrics['pythia-1.4b']['sentiment_cont']['normed_accuracy'] = rescale_tensor(metrics['pythia-1.4b']['sentiment_cont']['accuracy'])
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-1.4b']['sentiment_cont']['normed_logit_diff'],
metrics['pythia-1.4b']['sentiment_cont']['normed_accuracy'],
metrics['pythia-1.4b']['sentiment_cont']['rank_0'],
metrics['pythia-1.4b']['sentiment_cont']['probability_mass'],
metrics['pythia-1.4b']['sentiment_cont']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Sentiment Cont Metrics in 1.4B Model Over Training Time (Log Scale)')
l_line(metrics['pythia-1.4b']['sentiment_cont']['logit_diff'], title="Pythia-1.4B Simple Sentiment Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-1.4b']['sentiment_cont']['accuracy'], title="Pythia-1.4B Simple Sentiment Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-1.4b']['sentiment_class']['normed_logit_diff'] = metrics['pythia-1.4b']['sentiment_class']['logit_diff'] / metrics['pythia-1.4b']['sentiment_class']['logit_diff'].max()
metrics['pythia-1.4b']['sentiment_class']['normed_accuracy'] = rescale_tensor(metrics['pythia-1.4b']['sentiment_class']['accuracy'])
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-1.4b']['sentiment_class']['normed_logit_diff'],
metrics['pythia-1.4b']['sentiment_class']['normed_accuracy'],
metrics['pythia-1.4b']['sentiment_class']['rank_0'],
metrics['pythia-1.4b']['sentiment_class']['probability_mass'],
metrics['pythia-1.4b']['sentiment_class']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Sentiment Classification Metrics in 1.4B Model Over Training Time (Log Scale)')
l_line(metrics['pythia-1.4b']['sentiment_class']['logit_diff'], title="Pythia-1.4B Sentiment Classification Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-1.4b']['sentiment_class']['accuracy'], title="Pythia-1.4B Sentiment Classification Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
data_series = [
metrics['pythia-1.4b']['ioi']['normed_logit_diff'],
metrics['pythia-1.4b']['greater_than']['normed_logit_diff'],
metrics['pythia-1.4b']['sentiment_cont']['normed_logit_diff'],
metrics['pythia-1.4b']['sentiment_class']['normed_logit_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Normalized Task Logit Diff in 1.4B Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-1.4b']['ioi']['logit_diff'],
metrics['pythia-1.4b']['greater_than']['logit_diff'],
metrics['pythia-1.4b']['sentiment_cont']['logit_diff'],
metrics['pythia-1.4b']['sentiment_class']['logit_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Raw Task Logit Diff in 1.4B Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-1.4b']['ioi']['probability_diff'],
metrics['pythia-1.4b']['greater_than']['prob_diff'],
metrics['pythia-1.4b']['sentiment_cont']['probability_diff'],
metrics['pythia-1.4b']['sentiment_class']['probability_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Task Probability Diff in 1.4B Model Over Training Time (Log Scale)')
metrics['pythia-2.8b']['ioi']['normed_logit_diff'] = metrics['pythia-2.8b']['ioi']['logit_diff'] / metrics['pythia-2.8b']['ioi']['logit_diff'].max()
metrics['pythia-2.8b']['ioi']['normed_accuracy'] = rescale_tensor(metrics['pythia-2.8b']['ioi']['accuracy'])
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-2.8b']['ioi']['normed_logit_diff'],
metrics['pythia-2.8b']['ioi']['normed_accuracy'],
metrics['pythia-2.8b']['ioi']['rank_0'],
metrics['pythia-2.8b']['ioi']['probability_mass'],
metrics['pythia-2.8b']['ioi']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='IOI Metrics in 2.8B Model Over Training Time (Log Scale)')
l_line(metrics['pythia-2.8b']['ioi']['logit_diff'], title="Pythia-2.8b IOI Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-2.8b']['ioi']['accuracy'], title="Pythia-2.8b Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-2.8b']['greater_than']['normed_logit_diff'] = metrics['pythia-2.8b']['greater_than']['logit_diff'] / metrics['pythia-2.8b']['greater_than']['logit_diff'].max()
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-2.8b']['greater_than']['normed_logit_diff'],
metrics['pythia-2.8b']['greater_than']['prob_diff'],
]
labels = ['Normed Logit Diff', 'Probability Diff']#, 'Probability Mass', ]
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Greater-Than Metrics in 2.8b Model Over Training Time (Log Scale)')
l_line(metrics['pythia-2.8b']['greater_than']['logit_diff'], title="Pythia-2.8b Greater-Than Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-2.8b']['sentiment_cont']['normed_logit_diff'] = metrics['pythia-2.8b']['sentiment_cont']['logit_diff'] / metrics['pythia-2.8b']['sentiment_cont']['logit_diff'].max()
metrics['pythia-2.8b']['sentiment_cont']['normed_accuracy'] = rescale_tensor(metrics['pythia-2.8b']['sentiment_cont']['accuracy'])
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-2.8b']['sentiment_cont']['normed_logit_diff'],
metrics['pythia-2.8b']['sentiment_cont']['normed_accuracy'],
metrics['pythia-2.8b']['sentiment_cont']['rank_0'],
metrics['pythia-2.8b']['sentiment_cont']['probability_mass'],
metrics['pythia-2.8b']['sentiment_cont']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Sentiment Cont Metrics in 2.8b Model Over Training Time (Log Scale)')
l_line(metrics['pythia-2.8b']['sentiment_cont']['logit_diff'], title="Pythia-2.8b Sentiment Completion Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
l_line(metrics['pythia-2.8b']['sentiment_cont']['accuracy'], title="Pythia-2.8b Sentiment Completion Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
metrics['pythia-2.8b']['sentiment_class']['normed_logit_diff'] = metrics['pythia-2.8b']['sentiment_class']['logit_diff'] / metrics['pythia-2.8b']['sentiment_class']['logit_diff'].max()
metrics['pythia-2.8b']['sentiment_class']['normed_accuracy'] = rescale_tensor(metrics['pythia-2.8b']['sentiment_class']['accuracy'])
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-2.8b']['sentiment_class']['normed_logit_diff'],
metrics['pythia-2.8b']['sentiment_class']['normed_accuracy'],
metrics['pythia-2.8b']['sentiment_class']['rank_0'],
metrics['pythia-2.8b']['sentiment_class']['probability_mass'],
metrics['pythia-2.8b']['sentiment_class']['probability_diff']
]
labels = ['Normed Logit Diff', 'Normed Accuracy', 'Correct Rank0 Rate', 'Probability Mass', 'Probability Diff']
colors = ['red', 'blue', 'green', 'orange', 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Sentiment Classification Metrics in 2.8b Model Over Training Time (Log Scale)')
l_line(metrics['pythia-2.8b']['sentiment_class']['logit_diff'], title="Pythia-2.8b Sentiment Classification Logit Diff Over Training Time (Log Scale)", x=ckpts, log_x=True)
line(metrics['pythia-2.8b']['sentiment_class']['accuracy'], title="Pythia-2.8b Sentiment Classification Accuracy Over Training Time (Log Scale)", x=ckpts, log_x=True)
data_series = [
metrics['pythia-2.8b']['ioi']['normed_logit_diff'],
metrics['pythia-2.8b']['greater_than']['normed_logit_diff'],
metrics['pythia-2.8b']['sentiment_cont']['normed_logit_diff'],
metrics['pythia-2.8b']['sentiment_class']['normed_logit_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Normalized Task Logit Diff in 2.8b Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-2.8b']['ioi']['logit_diff'],
metrics['pythia-2.8b']['greater_than']['logit_diff'],
metrics['pythia-2.8b']['sentiment_cont']['logit_diff'],
metrics['pythia-2.8b']['sentiment_class']['logit_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Raw Task Logit Diff in 2.8b Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-2.8b']['ioi']['probability_diff'],
metrics['pythia-2.8b']['greater_than']['prob_diff'],
metrics['pythia-2.8b']['sentiment_cont']['probability_diff'],
metrics['pythia-2.8b']['sentiment_class']['probability_diff']
]
labels = ['IOI', 'Greater-Than', 'Sentiment Cont', 'Sentiment Class']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Metric Value', title='Task Probability Diff in 2.8b Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['logit_diff'],
metrics['pythia-410m']['ioi']['logit_diff'],
metrics['pythia-1.4b']['ioi']['logit_diff'],
metrics['pythia-2.8b']['ioi']['logit_diff'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']#, 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='IOI Logit Diff Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['accuracy'],
metrics['pythia-410m']['ioi']['accuracy'],
metrics['pythia-1.4b']['ioi']['accuracy'],
metrics['pythia-2.8b']['ioi']['accuracy'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']#, 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='IOI Accuracy Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['rank_0'],
metrics['pythia-410m']['ioi']['rank_0'],
metrics['pythia-1.4b']['ioi']['rank_0'],
metrics['pythia-2.8b']['ioi']['rank_0'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']#, 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='IOI Rank 0 Rate Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['probability_mass'],
metrics['pythia-410m']['ioi']['probability_mass'],
metrics['pythia-1.4b']['ioi']['probability_mass'],
metrics['pythia-2.8b']['ioi']['probability_mass'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']#, 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='IOI Probability Mass Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['probability_diff'],
metrics['pythia-410m']['ioi']['probability_diff'],
metrics['pythia-1.4b']['ioi']['probability_diff'],
metrics['pythia-2.8b']['ioi']['probability_diff'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']#, 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='IOI Probability Diff Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['greater_than']['logit_diff'],
metrics['pythia-410m']['greater_than']['logit_diff'],
metrics['pythia-1.4b']['greater_than']['logit_diff'],
metrics['pythia-2.8b']['greater_than']['logit_diff'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']#, 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Greater-Than Logit Diff Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['greater_than']['prob_diff'],
metrics['pythia-410m']['greater_than']['prob_diff'],
metrics['pythia-1.4b']['greater_than']['prob_diff'],
metrics['pythia-2.8b']['greater_than']['prob_diff'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']#, 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Greater-Than Probability Diff Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['logit_diff'],
metrics['pythia-410m']['sentiment_cont']['logit_diff'],
metrics['pythia-1.4b']['sentiment_cont']['logit_diff'],
metrics['pythia-2.8b']['sentiment_cont']['logit_diff'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']#, 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Sentiment Cont Logit Diff Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['accuracy'],
metrics['pythia-410m']['sentiment_cont']['accuracy'],
metrics['pythia-1.4b']['sentiment_cont']['accuracy'],
metrics['pythia-2.8b']['sentiment_cont']['accuracy'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']#, 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Accuracy', title='Sentiment Cont Accuracy Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['rank_0'],
metrics['pythia-410m']['sentiment_cont']['rank_0'],
metrics['pythia-1.4b']['sentiment_cont']['rank_0'],
metrics['pythia-2.8b']['sentiment_cont']['rank_0'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']#, 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Rank 0 Rate', title='Sentiment Cont Rank 0 Rate Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['probability_diff'],
metrics['pythia-410m']['sentiment_cont']['probability_diff'],
metrics['pythia-1.4b']['sentiment_cont']['probability_diff'],
metrics['pythia-2.8b']['sentiment_cont']['probability_diff'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']#, 'purple']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Sentiment Cont Probability Diff Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['probability_mass'],
metrics['pythia-410m']['sentiment_class']['probability_mass'],
metrics['pythia-1.4b']['sentiment_class']['probability_mass'],
metrics['pythia-2.8b']['sentiment_class']['probability_mass'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Mass', title='Sentiment Class Probability Mass Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['logit_diff'],
metrics['pythia-410m']['sentiment_class']['logit_diff'],
metrics['pythia-1.4b']['sentiment_class']['logit_diff'],
metrics['pythia-2.8b']['sentiment_class']['logit_diff'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Sentiment Class Logit Diff Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['accuracy'],
metrics['pythia-410m']['sentiment_class']['accuracy'],
metrics['pythia-1.4b']['sentiment_class']['accuracy'],
metrics['pythia-2.8b']['sentiment_class']['accuracy'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Accuracy', title='Sentiment Class Accuracy Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['rank_0'],
metrics['pythia-410m']['sentiment_class']['rank_0'],
metrics['pythia-1.4b']['sentiment_class']['rank_0'],
metrics['pythia-2.8b']['sentiment_class']['rank_0'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Rank 0 Rate', title='Sentiment Class Rank 0 Rate Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['probability_diff'],
metrics['pythia-410m']['sentiment_class']['probability_diff'],
metrics['pythia-1.4b']['sentiment_class']['probability_diff'],
metrics['pythia-2.8b']['sentiment_class']['probability_diff'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Sentiment Class Probability Diff Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['probability_mass'],
metrics['pythia-410m']['sentiment_class']['probability_mass'],
metrics['pythia-1.4b']['sentiment_class']['probability_mass'],
metrics['pythia-2.8b']['sentiment_class']['probability_mass'],
]
labels = ['160M', '410M', '1.4B', '2.8B']#, '6.9B']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Mass', title='IOI Probability Mass Over Training Time (Log Scale)')
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-160m']['ioi']['logit_diff'],
metrics['pythia-160m-seed1']['ioi']['logit_diff'],
metrics['pythia-160m-seed2']['ioi']['logit_diff'],
metrics['pythia-160m-seed3']['ioi']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='IOI Logit Diff in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['accuracy'],
metrics['pythia-160m-seed1']['ioi']['accuracy'],
metrics['pythia-160m-seed2']['ioi']['accuracy'],
metrics['pythia-160m-seed3']['ioi']['accuracy'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Accuracy', title='IOI Accuracy in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['rank_0'],
metrics['pythia-160m-seed1']['ioi']['rank_0'],
metrics['pythia-160m-seed2']['ioi']['rank_0'],
metrics['pythia-160m-seed3']['ioi']['rank_0'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Mean Rank 0 Rate', title='IOI Rank 0 Rate in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['probability_mass'],
metrics['pythia-160m-seed1']['ioi']['probability_mass'],
metrics['pythia-160m-seed2']['ioi']['probability_mass'],
metrics['pythia-160m-seed3']['ioi']['probability_mass'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Mass', title='IOI Probability Mass in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['probability_diff'],
metrics['pythia-160m-seed1']['ioi']['probability_diff'],
metrics['pythia-160m-seed2']['ioi']['probability_diff'],
metrics['pythia-160m-seed3']['ioi']['probability_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='IOI Probability Diff in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['greater_than']['logit_diff'],
metrics['pythia-160m-seed1']['greater_than']['logit_diff'],
metrics['pythia-160m-seed2']['greater_than']['logit_diff'],
metrics['pythia-160m-seed3']['greater_than']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Greater-Than Logit Diff in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['greater_than']['prob_diff'],
metrics['pythia-160m-seed1']['greater_than']['prob_diff'],
metrics['pythia-160m-seed2']['greater_than']['prob_diff'],
metrics['pythia-160m-seed3']['greater_than']['prob_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Greater-Than Probability Diff in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['logit_diff'],
metrics['pythia-160m-seed1']['sentiment_cont']['logit_diff'],
metrics['pythia-160m-seed2']['sentiment_cont']['logit_diff'],
metrics['pythia-160m-seed3']['sentiment_cont']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Sentiment Cont Logit Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['accuracy'],
metrics['pythia-160m-seed1']['sentiment_cont']['accuracy'],
metrics['pythia-160m-seed2']['sentiment_cont']['accuracy'],
metrics['pythia-160m-seed3']['sentiment_cont']['accuracy'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Accuracy', title='Sentiment Cont Accuracy in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['rank_0'],
metrics['pythia-160m-seed1']['sentiment_cont']['rank_0'],
metrics['pythia-160m-seed2']['sentiment_cont']['rank_0'],
metrics['pythia-160m-seed3']['sentiment_cont']['rank_0'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Rank 0 Rate', title='Sentiment Cont Rank 0 Rate in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['probability_diff'],
metrics['pythia-160m-seed1']['sentiment_cont']['probability_diff'],
metrics['pythia-160m-seed2']['sentiment_cont']['probability_diff'],
metrics['pythia-160m-seed3']['sentiment_cont']['probability_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Sentiment Cont Probability Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['logit_diff'],
metrics['pythia-160m-seed1']['sentiment_class']['logit_diff'],
metrics['pythia-160m-seed2']['sentiment_class']['logit_diff'],
metrics['pythia-160m-seed3']['sentiment_class']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Sentiment Class Logit Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['accuracy'],
metrics['pythia-160m-seed1']['sentiment_class']['accuracy'],
metrics['pythia-160m-seed2']['sentiment_class']['accuracy'],
metrics['pythia-160m-seed3']['sentiment_class']['accuracy'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Accuracy', title='Sentiment Class Accuracy in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['rank_0'],
metrics['pythia-160m-seed1']['sentiment_class']['rank_0'],
metrics['pythia-160m-seed2']['sentiment_class']['rank_0'],
metrics['pythia-160m-seed3']['sentiment_class']['rank_0'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Rank 0 Rate', title='Sentiment Class Rank 0 Rate in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['probability_diff'],
metrics['pythia-160m-seed1']['sentiment_class']['probability_diff'],
metrics['pythia-160m-seed2']['sentiment_class']['probability_diff'],
metrics['pythia-160m-seed3']['sentiment_class']['probability_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Sentiment Class Probability Diff in 160M Models Over Training Time (Log Scale)')
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-160m']['ioi']['logit_diff'],
metrics['pythia-160m-weight-seed1']['ioi']['logit_diff'],
metrics['pythia-160m-weight-seed2']['ioi']['logit_diff'],
metrics['pythia-160m-weight-seed3']['ioi']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='IOI Logit Diff in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['accuracy'],
metrics['pythia-160m-weight-seed1']['ioi']['accuracy'],
metrics['pythia-160m-weight-seed2']['ioi']['accuracy'],
metrics['pythia-160m-weight-seed3']['ioi']['accuracy'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Accuracy', title='IOI Accuracy in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['rank_0'],
metrics['pythia-160m-weight-seed1']['ioi']['rank_0'],
metrics['pythia-160m-weight-seed2']['ioi']['rank_0'],
metrics['pythia-160m-weight-seed3']['ioi']['rank_0'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Mean Rank 0 Rate', title='IOI Rank 0 Rate in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['probability_mass'],
metrics['pythia-160m-weight-seed1']['ioi']['probability_mass'],
metrics['pythia-160m-weight-seed2']['ioi']['probability_mass'],
metrics['pythia-160m-weight-seed3']['ioi']['probability_mass'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Mass', title='IOI Probability Mass in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['probability_diff'],
metrics['pythia-160m-weight-seed1']['ioi']['probability_diff'],
metrics['pythia-160m-weight-seed2']['ioi']['probability_diff'],
metrics['pythia-160m-weight-seed3']['ioi']['probability_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='IOI Probability Diff in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['greater_than']['logit_diff'],
metrics['pythia-160m-weight-seed1']['greater_than']['logit_diff'],
metrics['pythia-160m-weight-seed2']['greater_than']['logit_diff'],
metrics['pythia-160m-weight-seed3']['greater_than']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Greater-Than Logit Diff in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['greater_than']['prob_diff'],
metrics['pythia-160m-weight-seed1']['greater_than']['prob_diff'],
metrics['pythia-160m-weight-seed2']['greater_than']['prob_diff'],
metrics['pythia-160m-weight-seed3']['greater_than']['prob_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Greater-Than Probability Diff in 160M Model Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['logit_diff'],
metrics['pythia-160m-weight-seed1']['sentiment_cont']['logit_diff'],
metrics['pythia-160m-weight-seed2']['sentiment_cont']['logit_diff'],
metrics['pythia-160m-weight-seed3']['sentiment_cont']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Sentiment Cont Logit Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['accuracy'],
metrics['pythia-160m-weight-seed1']['sentiment_cont']['accuracy'],
metrics['pythia-160m-weight-seed2']['sentiment_cont']['accuracy'],
metrics['pythia-160m-weight-seed3']['sentiment_cont']['accuracy'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Accuracy', title='Sentiment Cont Accuracy in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['rank_0'],
metrics['pythia-160m-weight-seed1']['sentiment_cont']['rank_0'],
metrics['pythia-160m-weight-seed2']['sentiment_cont']['rank_0'],
metrics['pythia-160m-weight-seed3']['sentiment_cont']['rank_0'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Rank 0 Rate', title='Sentiment Cont Rank 0 Rate in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['probability_diff'],
metrics['pythia-160m-weight-seed1']['sentiment_cont']['probability_diff'],
metrics['pythia-160m-weight-seed2']['sentiment_cont']['probability_diff'],
metrics['pythia-160m-weight-seed3']['sentiment_cont']['probability_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Sentiment Cont Probability Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['logit_diff'],
metrics['pythia-160m-weight-seed1']['sentiment_class']['logit_diff'],
metrics['pythia-160m-weight-seed2']['sentiment_class']['logit_diff'],
metrics['pythia-160m-weight-seed3']['sentiment_class']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Sentiment Class Logit Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['accuracy'],
metrics['pythia-160m-weight-seed1']['sentiment_class']['accuracy'],
metrics['pythia-160m-weight-seed2']['sentiment_class']['accuracy'],
metrics['pythia-160m-weight-seed3']['sentiment_class']['accuracy'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Accuracy', title='Sentiment Class Accuracy in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['rank_0'],
metrics['pythia-160m-weight-seed1']['sentiment_class']['rank_0'],
metrics['pythia-160m-weight-seed2']['sentiment_class']['rank_0'],
metrics['pythia-160m-weight-seed3']['sentiment_class']['rank_0'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Rank 0 Rate', title='Sentiment Class Rank 0 Rate in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['probability_diff'],
metrics['pythia-160m-weight-seed1']['sentiment_class']['probability_diff'],
metrics['pythia-160m-weight-seed2']['sentiment_class']['probability_diff'],
metrics['pythia-160m-weight-seed3']['sentiment_class']['probability_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Sentiment Class Probability Diff in 160M Models Over Training Time (Log Scale)')
ckpts = get_checkpoint_names("exp_plus_detail")
data_series = [
metrics['pythia-160m']['ioi']['logit_diff'],
metrics['pythia-160m-data-seed1']['ioi']['logit_diff'],
metrics['pythia-160m-data-seed2']['ioi']['logit_diff'],
metrics['pythia-160m-data-seed3']['ioi']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='IOI Logit Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['accuracy'],
metrics['pythia-160m-data-seed1']['ioi']['accuracy'],
metrics['pythia-160m-data-seed2']['ioi']['accuracy'],
metrics['pythia-160m-data-seed3']['ioi']['accuracy'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Accuracy', title='IOI Accuracy in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['rank_0'],
metrics['pythia-160m-data-seed1']['ioi']['rank_0'],
metrics['pythia-160m-data-seed2']['ioi']['rank_0'],
metrics['pythia-160m-data-seed3']['ioi']['rank_0'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Mean Rank 0 Rate', title='IOI Rank 0 Rate in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['probability_mass'],
metrics['pythia-160m-data-seed1']['ioi']['probability_mass'],
metrics['pythia-160m-data-seed2']['ioi']['probability_mass'],
metrics['pythia-160m-data-seed3']['ioi']['probability_mass'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Mass', title='IOI Probability Mass in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['ioi']['probability_diff'],
metrics['pythia-160m-data-seed1']['ioi']['probability_diff'],
metrics['pythia-160m-data-seed2']['ioi']['probability_diff'],
metrics['pythia-160m-data-seed3']['ioi']['probability_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='IOI Probability Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['greater_than']['logit_diff'],
metrics['pythia-160m-data-seed1']['greater_than']['logit_diff'],
metrics['pythia-160m-data-seed2']['greater_than']['logit_diff'],
metrics['pythia-160m-data-seed3']['greater_than']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Greater-Than Logit Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['greater_than']['prob_diff'],
metrics['pythia-160m-data-seed1']['greater_than']['prob_diff'],
metrics['pythia-160m-data-seed2']['greater_than']['prob_diff'],
metrics['pythia-160m-data-seed3']['greater_than']['prob_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Greater-Than Probability Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['logit_diff'],
metrics['pythia-160m-data-seed1']['sentiment_cont']['logit_diff'],
metrics['pythia-160m-data-seed2']['sentiment_cont']['logit_diff'],
metrics['pythia-160m-data-seed3']['sentiment_cont']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Sentiment Cont Logit Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['accuracy'],
metrics['pythia-160m-data-seed1']['sentiment_cont']['accuracy'],
metrics['pythia-160m-data-seed2']['sentiment_cont']['accuracy'],
metrics['pythia-160m-data-seed3']['sentiment_cont']['accuracy'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Accuracy', title='Sentiment Cont Accuracy in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['rank_0'],
metrics['pythia-160m-data-seed1']['sentiment_cont']['rank_0'],
metrics['pythia-160m-data-seed2']['sentiment_cont']['rank_0'],
metrics['pythia-160m-data-seed3']['sentiment_cont']['rank_0'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Rank 0 Rate', title='Sentiment Cont Rank 0 Rate in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_cont']['probability_diff'],
metrics['pythia-160m-data-seed1']['sentiment_cont']['probability_diff'],
metrics['pythia-160m-data-seed2']['sentiment_cont']['probability_diff'],
metrics['pythia-160m-data-seed3']['sentiment_cont']['probability_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Sentiment Cont Probability Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['logit_diff'],
metrics['pythia-160m-data-seed1']['sentiment_class']['logit_diff'],
metrics['pythia-160m-data-seed2']['sentiment_class']['logit_diff'],
metrics['pythia-160m-data-seed3']['sentiment_class']['logit_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Logit Diff', title='Sentiment Class Logit Diff in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['accuracy'],
metrics['pythia-160m-data-seed1']['sentiment_class']['accuracy'],
metrics['pythia-160m-data-seed2']['sentiment_class']['accuracy'],
metrics['pythia-160m-data-seed3']['sentiment_class']['accuracy'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Accuracy', title='Sentiment Class Accuracy in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['rank_0'],
metrics['pythia-160m-data-seed1']['sentiment_class']['rank_0'],
metrics['pythia-160m-data-seed2']['sentiment_class']['rank_0'],
metrics['pythia-160m-data-seed3']['sentiment_class']['rank_0'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Rank 0 Rate', title='Sentiment Class Rank 0 Rate in 160M Models Over Training Time (Log Scale)')
data_series = [
metrics['pythia-160m']['sentiment_class']['probability_diff'],
metrics['pythia-160m-data-seed1']['sentiment_class']['probability_diff'],
metrics['pythia-160m-data-seed2']['sentiment_class']['probability_diff'],
metrics['pythia-160m-data-seed3']['sentiment_class']['probability_diff'],
]
labels = ['Canonical', 'Seed 1', 'Seed 2', 'Seed 3']
colors = ['red', 'blue', 'green', 'orange']
plot_lines(data_series, labels, colors, x_vals=ckpts, x_title='Steps', y_title='Probability Diff', title='Sentiment Class Probability Diff in 160M Models Over Training Time (Log Scale)')